##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
source(paste0(root.dir, '../step2/scripts/myRLib.R'))
intermediate.files <- strsplit('../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/covar_LDpred_p1.0000e-03.txt:../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/covar_LDpred_p3.0000e-04.txt:../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/covar_LDpred_p1.0000e-04.txt', ':')[[1]]
ps <- strsplit('0.001,0.0003,0.0001', ',')[[1]]
expression.files <- strsplit('output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Artery_Coronary_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/DGN-HapMap-2015_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Whole_Blood_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Cells_EBV-transformed_lymphocytes_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Thyroid_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Stomach_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Small_Intestine_Terminal_Ileum_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Esophagus_Muscularis_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Spleen_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Pancreas_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Esophagus_Gastroesophageal_Junction_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Adrenal_Gland_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Adipose_Visceral_Omentum_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Adipose_Subcutaneous_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Pituitary_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Colon_Transverse_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Esophagus_Mucosa_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Liver_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Lung_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Muscle_Skeletal_predicted_expression.txt:output-run_step3/fastInsulin__wtccc_t2d__wtccc_ctrl/Cells_Transformed_fibroblasts_predicted_expression.txt', ':')[[1]]
ts <- strsplit('Artery_Coronary,DGN-HapMap-2015,Whole_Blood,Cells_EBV-transformed_lymphocytes,Thyroid,Stomach,Small_Intestine_Terminal_Ileum,Esophagus_Muscularis,Spleen,Pancreas,Esophagus_Gastroesophageal_Junction,Adrenal_Gland,Adipose_Visceral_Omentum,Adipose_Subcutaneous,Pituitary,Colon_Transverse,Esophagus_Mucosa,Liver,Lung,Muscle_Skeletal,Cells_Transformed_fibroblasts', ',')[[1]]
gene <- read.table(getFile(root.dir, 'output-prepare_gene_id/fastInsulin__wtccc_t2d__wtccc_ctrl.genelist'), header = F, sep = '\t')
colnames(gene) <- c('gene.name', 'gencode.id')
yi <- read.table(getFile(root.dir, '../step2/output-direction_XT_YI/fastInsulin__wtccc_t2d__wtccc_ctrl/yi.logistic.assoc'), header = T)
cols <- colnames(yi)
yi.filenames <- apply(yi, 1, function(x) { return(basename(x[1]))})
yi.container <- c()
expression.list <- list()
for(i in 1 : length(ts)) {
expression.list[[ts[i]]] <- read.table(getFile(root.dir, expression.files[i]), header = T, sep = ' ')
}
intermediate.list <- list()
for(i in 1 : length(ps)) {
thisfile <- basename(intermediate.files[i])
yi.container <- rbind(yi.container, c(ps[i], yi[yi.filenames == thisfile, 2:7]))
intermediate.list[[ps[i]]] <- read.table(getFile(root.dir, intermediate.files[i]), header = T, sep = ' ')
intermediate.list[[ps[i]]]$PHENO <- intermediate.list[[ps[i]]]$PHENO - 1
}
yi.container <- data.frame(yi.container)
colnames(yi.container) <- c('causal.fraction', cols[2:7])
pander(gene, knitr.auto.asis = T)
##
## --------------------------------
## gene.name gencode.id
## ----------- --------------------
## RAB38 ENSG00000123892.7
##
## UHRF1 ENSG00000034063.9
##
## UHRF1BP1 ENSG00000065060.12
##
## UHRF1BP1L ENSG00000111647.8
##
## ANKS1A ENSG00000064999.12
##
## HRH1 ENSG00000196639.6
##
## LGR5 ENSG00000139292.8
##
## TSPAN8 ENSG00000127324.4
## --------------------------------
ye.container <- c()
inter <- intermediate.list[[ps[i]]]
temp.container <- c()
for (t in names(expression.list)) {
expre <- expression.list[[t]]
df <- inner_join(inter, expre, by = "IID")
for (g in gene[, 2]) {
if (sum(abs(expre[, g])) != 0) {
this.expre <- scale(expre[, g])
} else {
this.expre <- expre[, g]
}
data <- data.frame(y = inter$PHENO, e = this.expre)
model <- glm(y ~ e, family = binomial(link = "logit"),
data = data)
stats <- summary(model)
temp.container <- rbind(temp.container, c(t, g, stats$coefficients[1],
stats$coefficients[3], stats$coefficients[7], stats$coefficients[2],
stats$coefficients[4], stats$coefficients[8]))
}
}
temp.container <- data.frame(temp.container)
colnames(temp.container) <- c("tissue", "gene", "intercept.estmate",
"intercept.std", "intercept.pval", "E.estmate", "E.std",
"E.pval")
temp.container <- left_join(temp.container, gene, by = c(gene = "gencode.id"))
temp.container <- cleanDF(temp.container, c("intercept.estmate",
"intercept.std", "intercept.pval", "E.estmate", "E.std",
"E.pval"))
ncol <- length(unique(temp.container[!is.na(temp.container$E.pval),
"gene.name"]))/3
if (ncol == floor(ncol)) {
ncol <- floor(ncol)
} else {
ncol <- floor(ncol) + 1
}
p <- ggplot(temp.container[!is.na(temp.container$E.pval), ],
aes(x = tissue)) + geom_point(aes(y = E.estmate)) + geom_errorbar(aes(ymin = E.estmate -
1.96 * E.std, ymax = E.estmate + 1.96 * E.std), colour = "black",
width = 0.1) + geom_hline(yintercept = 0, color = "red") +
coord_flip() + theme(axis.text.x = element_text(angle = 90,
hjust = 1)) + geom_text(aes(y = E.estmate, label = paste0("pval = ",
formatC(E.pval, format = "e", digits = 2))), vjust = -0.5,
hjust = 0.5, size = 4) + ggtitle("Y ~ E: correlation estimate") +
ylab("Regression coefficient") + xlab("Expression Tissue") +
facet_wrap(~gene.name, scales = "free_x", ncol = 3)
subchunkify(p, fig_asp = ncol)
ye.container <- rbind(ye.container, temp.container)
ie.container <- c()
for (i in 1:length(intermediate.list)) {
cat("##", ps[i])
cat("\n")
cat("\n")
inter <- intermediate.list[[ps[i]]]
temp.container <- c()
for (t in names(expression.list)) {
expre <- expression.list[[t]]
df <- inner_join(inter, expre, by = "IID")
for (g in gene[, 2]) {
if (sum(abs(expre[, g])) != 0) {
this.expre <- scale(expre[, g])
} else {
this.expre <- expre[, g]
}
data <- data.frame(i = inter$PRS, e = this.expre)
model <- lm(i ~ e, data = data)
stats <- summary(model)
temp.container <- rbind(temp.container, c(ps[i],
t, g, stats$coefficients[1], stats$coefficients[3],
stats$coefficients[7], stats$coefficients[2],
stats$coefficients[4], stats$coefficients[8]))
}
}
temp.container <- data.frame(temp.container)
colnames(temp.container) <- c("causal.fraction", "tissue",
"gene", "intercept.estmate", "intercept.std", "intercept.pval",
"E.estmate", "E.std", "E.pval")
temp.container <- left_join(temp.container, gene, by = c(gene = "gencode.id"))
temp.container <- cleanDF(temp.container, c("intercept.estmate",
"intercept.std", "intercept.pval", "E.estmate", "E.std",
"E.pval"))
ncol <- length(unique(temp.container[!is.na(temp.container$E.pval),
"gene.name"]))/3
if (ncol == floor(ncol)) {
ncol <- floor(ncol)
} else {
ncol <- floor(ncol) + 1
}
p <- ggplot(temp.container[!is.na(temp.container$E.pval),
], aes(x = tissue)) + geom_point(aes(y = E.estmate)) +
geom_errorbar(aes(ymin = E.estmate - 1.96 * E.std, ymax = E.estmate +
1.96 * E.std), colour = "black", width = 0.1) + geom_hline(yintercept = 0,
color = "red") + coord_flip() + theme(axis.text.x = element_text(angle = 90,
hjust = 1)) + geom_text(aes(y = E.estmate, label = paste0("pval = ",
formatC(E.pval, format = "e", digits = 2))), vjust = -0.5,
hjust = 0.5, size = 4) + ggtitle("I ~ E: correlation estimate") +
ylab("Regression coefficient") + xlab("Expression Tissue") +
facet_wrap(~gene.name, scales = "free_x", ncol = 3)
subchunkify(p, fig_asp = ncol)
ie.container <- rbind(ie.container, temp.container)
cat("\n\n")
}
yei.container <- c()
for (i in 1:length(intermediate.list)) {
cat("##", ps[i])
cat("\n")
cat("\n")
inter <- intermediate.list[[ps[i]]]
temp.container <- c()
for (t in names(expression.list)) {
expre <- expression.list[[t]]
df <- inner_join(inter, expre, by = "IID")
for (g in gene[, 2]) {
if (sum(abs(expre[, g])) != 0) {
this.expre <- scale(expre[, g])
} else {
this.expre <- expre[, g]
}
data <- data.frame(y = inter$PHENO, i = inter$PRS,
e = this.expre)
model <- glm(y ~ e + i, family = binomial(link = "logit"),
data = data)
stats <- summary(model)
temp.container <- rbind(temp.container, c(ps[i],
t, g, stats$coefficients[2], stats$coefficients[5],
stats$coefficients[11], stats$coefficients[3],
stats$coefficients[6], stats$coefficients[12]))
}
}
temp.container <- data.frame(temp.container)
colnames(temp.container) <- c("causal.fraction", "tissue",
"gene", "E.estmate", "E.std", "E.pval", "I.estmate",
"I.std", "I.pval")
temp.container <- left_join(temp.container, gene, by = c(gene = "gencode.id"))
temp.container <- cleanDF(temp.container, c("I.estmate",
"I.std", "I.pval", "E.estmate", "E.std", "E.pval"))
ncol <- length(unique(temp.container[!is.na(temp.container$I.pval),
"gene.name"]))/3
if (ncol == floor(ncol)) {
ncol <- floor(ncol)
} else {
ncol <- floor(ncol) + 1
}
p1 <- ggplot(temp.container[!is.na(temp.container$I.pval),
], aes(x = tissue)) + geom_point(aes(y = I.estmate)) +
geom_errorbar(aes(ymin = I.estmate - 1.96 * I.std, ymax = I.estmate +
1.96 * I.std), colour = "black", width = 0.1) + geom_hline(yintercept = 0,
color = "red") + geom_hline(yintercept = yi.container[yi.container$causal.fraction ==
ps[i], "prs.estmate"][[1]], color = "blue") + coord_flip() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_text(aes(y = I.estmate, label = paste0("pval = ",
formatC(E.pval, format = "e", digits = 2))), vjust = -0.5,
hjust = 0.3, size = 4) + ggtitle("Y ~ E + I (I): correlation estimate") +
ylab("Regression coefficient") + xlab("Expression Tissue") +
facet_wrap(~gene.name, scales = "free_x", ncol = 3)
subchunkify(p1, fig_asp = ncol)
temp <- ye.container[, c("tissue", "gene.name", "E.estmate",
"E.std", "E.pval")]
temp$marginal <- T
temp2 <- temp.container[, c("tissue", "gene.name", "E.estmate",
"E.std", "E.pval")]
temp2$marginal <- F
plot.e <- rbind(temp2, temp)
ncol <- length(unique(plot.e[!is.na(plot.e$E.pval), "gene.name"]))/3
if (ncol == floor(ncol)) {
ncol <- floor(ncol)
} else {
ncol <- floor(ncol) + 1
}
dodge <- position_dodge(width = 0.5)
p2 <- ggplot(plot.e[!is.na(plot.e$E.pval), ], aes(x = tissue,
color = marginal)) + geom_point(aes(y = E.estmate), position = dodge) +
geom_errorbar(aes(ymin = E.estmate - 1.96 * E.std, ymax = E.estmate +
1.96 * E.std), width = 0.1, position = dodge) + geom_hline(yintercept = 0,
color = "black") + coord_flip() + theme(axis.text.x = element_text(angle = 90,
hjust = 1)) + geom_text(aes(y = E.estmate, label = paste0("pval = ",
formatC(E.pval, format = "e", digits = 2)), vjust = -0.3,
hjust = 0.3), size = 3, position = dodge) + ggtitle("Y ~ E + I (E): correlation estimate") +
ylab("Regression coefficient") + xlab("Expression Tissue") +
facet_wrap(~gene.name, scales = "free_x", ncol = 3)
subchunkify(p2, fig_asp = ncol)
yei.container <- rbind(yei.container, temp.container)
cat("\n\n")
}